alt + - will add <- an
assignment operator Shift + ctrl +
c to add # in front of a line
---- or four dashes for a header, so it is easy to navigate
through the script
command/Ctrl + Shift +
m for pipe %>%
Ctrl+ Alt + i for new code
chunk
Plain text
End a line with two spaces to start a new paragraph.
For italics *text* or _text_ (without
gap *text*)
For bold **text**(without gap
**text**)
superscript2 superscript^2^
Strikethrough ~Strikethrough~
Adding web link to a text: link to
rstudio e.g.,
[text] and without gap (paste link with http://xyz.com)
1==1 # equal
1!=3 # unequal
13<14 # 13 smaller than 14
14>13 # 14 bigger than 13
12>=0 # 12 greater or equal to zero
12<=3 # 12 smaller or equal to zero
i.e. family
name <- c("saneesh", "sanusha", "appu", "kishan")
weight <- c(63, 48, 20, NA)
height <- c(164, 150, NA, 75)
family <- data.frame(name, weight, height)
family %>%
as_tibble()
same.family <- data.frame(name = c("saneesh", "sanusha", "appu", "kishan"), weight = c(63,
48, 20, NA), height = c(164, 150, NA, 75))
Community <- c(rep("A", 3), rep("B", 3))
Species <- rep(c("X", "Y", "Z"), 2)
Count <- c(100, 0, 50, 50, 30, 40)
df <- data.frame(Community, Species, Count)
# abundance refers to the total number of individuals of different species
# within each community. It represents the quantity or total count of
# individuals present.
abundance <- df %>%
group_by(Community) %>%
summarise(Total_abundance = sum(Count))
# Species richness, on the other hand, refers to the total number of unique
# species present in each community. It represents the diversity of species
# within a community.
richness <- df %>%
group_by(Community) %>%
filter(Count > 0) %>%
distinct(Species) %>%
summarise(Richness = n())
tree <- c("a", "b", "c", "d")
treatment <- c("fire", "no_fire")
data.frame(tree = sample(tree, 20, replace = T), treatment = sample(treatment, 20,
replace = T), flower = rbinom(20, 3, prob = 0.3)) %>%
group_by(tree, treatment, flower) %>%
summarise(count = n(), .groups = "drop") %>%
mutate(prop = count/sum(count)) %>%
ggplot(aes(x = flower, y = prop, fill = tree)) + geom_bar(stat = "identity",
position = "dodge") + facet_wrap(~treatment)
library(dplyr)
df <- data.frame(tree = c(rep("a", 4), rep("b", 4)), seeds = c(0, 0, 0, 1, 2, 3,
0, 0))
zero_counts <- df %>%
group_by(tree) %>%
summarise(zero_count = sum(seeds == 0))
print(zero_counts)
## # A tibble: 2 × 2
## tree zero_count
## <chr> <int>
## 1 a 3
## 2 b 2
library(tidyverse)
data <- data.frame(sex = c(rep("female", 10), rep("male", 8)), score = c(rnorm(n = 10,
mean = 7.56, sd = 1.978), rnorm(n = 8, mean = 7.75, sd = 1.631)))
data %>%
head(5)
data %>%
group_by(sex) %>%
summarise(score = n()) %>%
mutate(freq = score/sum(score) * 100)
# newdf <- rownames_to_column(df, var = 'name to an unnamed')
library(tidyverse)
years <- tribble(~Location, ~Year, ~Month, ~Day, ~Lenght, "Sydney", 2000, 9, 15,
12.1213, "Athens", 2004, 8, 13, 12.1212, "Beijing", 2008, 8, 8, 13.212, "London",
2012, 7, 27, 13.1212, "Rio de Janeiro", 2016, 8, 5, 65)
# write.csv(years, file = 'years.csv', row.names = FALSE) # without index use
# row.names = FALSE
# run previous code chunk
library(gt)
years %>%
gt()
| Location | Year | Month | Day | Lenght |
|---|---|---|---|---|
| Sydney | 2000 | 9 | 15 | 12.1213 |
| Athens | 2004 | 8 | 13 | 12.1212 |
| Beijing | 2008 | 8 | 8 | 13.2120 |
| London | 2012 | 7 | 27 | 13.1212 |
| Rio de Janeiro | 2016 | 8 | 5 | 65.0000 |
years %>%
mutate(Lenght = round(Lenght, 2)) %>%
gt() %>%
tab_options(column_labels.font.size = 11, column_labels.font.weight = "bold",
table.font.size = 10, ) %>%
opt_table_outline(style = "solid", width = px(2))
| Location | Year | Month | Day | Lenght |
|---|---|---|---|---|
| Sydney | 2000 | 9 | 15 | 12.12 |
| Athens | 2004 | 8 | 13 | 12.12 |
| Beijing | 2008 | 8 | 8 | 13.21 |
| London | 2012 | 7 | 27 | 13.12 |
| Rio de Janeiro | 2016 | 8 | 5 | 65.00 |
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
data <- data.frame(HairEyeColor)
data %>%
tabyl(Hair, Eye) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 2) %>%
adorn_ns() %>%
knitr::kable()
| Hair | Brown | Blue | Hazel | Green |
|---|---|---|---|---|
| Black | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
| Brown | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
| Red | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
| Blond | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
# identify location of NAs in vector
which(is.na(family))
## [1] 8 11
colSums(is.na(family))
## name weight height
## 0 1 1
mat <- matrix(sample(c(NA, 1:5), 50, replace = TRUE), 5)
df <- as.data.frame(mat)
df %>%
replace(is.na(.), 0) %>%
View()
see spread & gather
# install.packages('janitor')
library(janitor)
id <- (c(1, 1, 2, 2, 3, 3))
Country <- c("Angola", "Angola", "Botswana", "Botswana", "Zimbabwe", "Zimbabwe")
year <- c("2006", "2007", "2008", "2009", "2010", "2006")
bank.ratio <- c(24, 25, 38, 34, 42, 49)
Reserve.ratio <- c(77, 59, 64, 65, 57, 86)
broad.money <- c(163, 188, 317, 361, 150, 288)
bank <- data.frame(id, Country, year, bank.ratio, Reserve.ratio, broad.money)
bank <- bank %>%
clean_names() # replaced . with _
glimpse(bank)
## Rows: 6
## Columns: 6
## $ id <dbl> 1, 1, 2, 2, 3, 3
## $ country <chr> "Angola", "Angola", "Botswana", "Botswana", "Zimbabwe", …
## $ year <chr> "2006", "2007", "2008", "2009", "2010", "2006"
## $ bank_ratio <dbl> 24, 25, 38, 34, 42, 49
## $ reserve_ratio <dbl> 77, 59, 64, 65, 57, 86
## $ broad_money <dbl> 163, 188, 317, 361, 150, 288
filter bank data frame below such that it retains a country if a given id is satisfied e.g. filtering a data frame that has countries with id 1 and 2 only
bank %>%
filter(id %in% c(1, 2)) %>%
as_tibble()
summarise fund available with each countries
bank %>%
group_by(country) %>%
summarise(fund = sum(broad_money)) %>%
as_tibble()
column: new name= old name
iris %>%
rename(S.len = Sepal.Length, Sp. = Species) %>%
head(3)
iris %>%
rename_with(tolower) %>%
head(3)
iris %>%
select_at(vars(Species, Petal.Length), tolower) %>%
head(3)
library(tidyverse)
mtcars <- mtcars %>%
as_tibble(rownames = "cars")
library(tibble)
iris %>%
add_column(ob_no = 1:150) %>%
head(5)
iris %>%
as_tibble() %>%
head(3)
library(gapminder)
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## $ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
change name of observation— mutate (variable=recode (variable, ‘old name’=‘new name’)))
gapminder %>%
mutate(country = recode(country, India = "IND")) %>%
filter(country == "IND") %>%
head(3)
To convert all non-zero numeric values to “Yes” to convert zero values to “No”
df <- data.frame(name = c("saneesh", "sanusha", "appu", "jaru"), sex = c(2, 0, 5,
8))
df
# convert numeric values to 'Yes'
df %>%
mutate(sex1 = ifelse(sex != 0, "Yes", "No"))
df %>%
mutate(sex1 = ifelse(sex != 0, "Male", "Female"))
The ifelse() function is used to check whether each
value in the “sex” column is non-zero. If it is, the value is replaced
with “Yes”. If not, the value is replaced with “No”.
gapminder %>%
select(year, country, gdpPercap) %>%
head(3)
msleep %>%
select(starts_with("sleep")) %>%
head(3)
iris %>%
select(-Sepal.Length, -Species) %>%
head(3)
iris %>%
select(-c(Sepal.Length)) %>%
head(3)
iris %>%
select(!Sepal.Length) %>%
head(3)
ends_withiris %>%
select(ends_with("length")) %>%
head(3)
starts_withiris %>%
select(starts_with("Sepal")) %>%
head(3)
gapminder %>%
select(year, country, lifeExp) %>%
filter(country == "Eritrea", year > 1950) %>%
head(3)
gapminder %>%
filter(country == "Canada") %>%
head(3) # from gapminder data filter country Canada and show only 2 observations
gapminder %>%
filter(country != "Oman") %>%
head(3) # from gapminder data filter all the other countries except Oman
iris %>%
filter(Species != "setosa") %>%
glimpse()
## Rows: 100
## Columns: 5
## $ Sepal.Length <dbl> 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.…
## $ Sepal.Width <dbl> 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.…
## $ Petal.Length <dbl> 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.…
## $ Petal.Width <dbl> 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.…
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, versicolo…
iris %>%
select(Species) %>%
distinct(Species) %>%
filter(Species %in% c("setosa", "versicolor")) %>%
head(3)
using a vector, save the names as a vector and give it to
%in%
target <- c("Hungary", "Iceland", "Mongolia")
gapminder %>%
filter(country %in% target) %>%
head(3)
friends <- data.frame(Names = c("Saneesh", "Appu", "Shruti", "Aradhana", "Arathi",
"James Bond"), age = c(40, 9, 25, 25, 25, 50))
# data frame is friends columns in friends are Names, Age, Height, etc. Column
# Name have 'Saneesh', 'Appu', 'Shruti', 'Aradhana', 'Arathi', 'James Bond' We
# want to filter information related to Sanees and James Bond only, so we
# created a vector with these names in it.
target <- c("Appu", "James Bond") #and then
friends %>%
filter(Names %in% target)
# or
friends %>%
filter(Names == "Appu" | Names == "James Bond")
# or
friends %>%
filter(Names %in% c("Appu", "James Bond"))
iris %>%
filter(!Species %in% c("setosa", "versicolor")) %>%
glimpse()
## Rows: 50
## Columns: 5
## $ Sepal.Length <dbl> 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.…
## $ Sepal.Width <dbl> 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.…
## $ Petal.Length <dbl> 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.…
## $ Petal.Width <dbl> 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.…
## $ Species <fct> virginica, virginica, virginica, virginica, virginica, vi…
iris %>%
filter(Petal.Width >= 2 & Petal.Width <= 5) %>%
glimpse()
## Rows: 29
## Columns: 5
## $ Sepal.Length <dbl> 6.3, 7.1, 6.5, 7.6, 7.2, 6.5, 6.8, 5.7, 5.8, 6.4, 7.7, 7.…
## $ Sepal.Width <dbl> 3.3, 3.0, 3.0, 3.0, 3.6, 3.2, 3.0, 2.5, 2.8, 3.2, 3.8, 2.…
## $ Petal.Length <dbl> 6.0, 5.9, 5.8, 6.6, 6.1, 5.1, 5.5, 5.0, 5.1, 5.3, 6.7, 6.…
## $ Petal.Width <dbl> 2.5, 2.1, 2.2, 2.1, 2.5, 2.0, 2.1, 2.0, 2.4, 2.3, 2.2, 2.…
## $ Species <fct> virginica, virginica, virginica, virginica, virginica, vi…
library(tidyverse)
library(dplyr)
mtcars <- mtcars %>%
rownames_to_column
mtcars %>%
filter(str_detect(rowname, "Merc")) %>%
head(3) # filter only 'Merc'
mtcars %>%
filter(!str_detect(rowname, "Merc")) %>%
head(3) # filter everything except 'Merc'
To remove or exclude all entries in the “name” column of your data
frame that have 1 in the “pref” column, you can use the
filter() and distinct() functions from the
dplyr
df <- data.frame(name = c("a", "a", "b", "c", "d", "a", "d"), pref = c(1, 2, 2, 1,
3, 4, 1))
df
df %>%
group_by(name) %>%
filter(!any(pref == 1)) %>%
ungroup()
or, if you have multiple rows with the same name but different values in the “pref” column, the code above will remove all rows with that name if any of them have 1 in the “pref” column. If you want to remove only the rows with 1 in the “pref” column, but keep the other rows with the same name, you can modify the code as follows:
df %>%
group_by(name) %>%
filter(!any(pref == 1)) %>%
ungroup()
iris %>%
pull(Species) %>%
head(3) # returns vector values
## [1] setosa setosa setosa
## Levels: setosa versicolor virginica
iris %>%
select(Species) %>%
head(3) # returns a table with one column
iris %>%
select(everything()) %>%
head(3)
gapminder %>%
filter(country == "Oman" & year > 1980 & year <= 2000) %>%
head(4)
gapminder %>%
select(country, year) %>%
filter(year >= 1980, country == "India" | country == "Oman" | country == "Canada") %>%
head(4)
gapminder %>%
filter(country != "Oman") %>%
head(3) # from gapminder data filter all the other countires exept Oman
gapminder %>%
select(-year, -pop) %>%
head(5)
gapminder %>%
filter(year == 2007) %>%
group_by(country) %>%
summarise(meanLE = mean(lifeExp)) %>%
arrange(meanLE, decreasing = TRUE) %>%
head(3)
gapminder %>%
group_by(country) %>%
summarise(minLE = min(lifeExp)) %>%
arrange(minLE, decreasing = FALSE) %>%
head(3)
grouped by continent, then summarise two things, first
n=n() number of rows in which each continent are or the
size of each group, then the mean of the mean of the lifeExp
variable.
gapminder %>%
group_by(continent) %>%
summarise(n = n(), meanLife = mean(lifeExp))
gapminder %>%
group_by(continent) %>%
summarise(PopConti = sum(pop))
pets <- data.frame(names = c(rep("saneesh", 3), rep("appu", 2), "sanusha"), pet = c(rep("dog",
3), rep("cat", 2), "tiger"), number = c(2, 2, 5, 7, 8, 1), size = c(rep("medium",
2), rep("small", 3), "big"))
pets
pets %>%
group_by(pet, size) %>%
summarise(totalpet = sum(number))
## `summarise()` has grouped output by 'pet'. You can override using the `.groups`
## argument.
If we want make a ‘new column’ with values from ‘number’ only if ‘sp.name’ ‘a’ or any other values has the following responses ‘young’ and ‘adult’, if not enter 0 in the ‘new column’.
You need to have groups with any of stage == “young” & “adult” (group level conditions) and stage == “adult” (row-level condition):
library(tidyverse)
plot <- c(rep(1, 2), rep(2, 4), rep(3, 3))
bird <- c("a", "b", "a", "b", "c", "d", "a", "b", "c")
area <- c(rep(10, 2), rep(5, 4), rep(15, 3))
birdlist <- data.frame(plot, bird, area)
birdlist
# summarize the following data frame to a summary table. option 1
birdlist %>%
group_by(plot) %>%
summarise(bird = n(), area = unique(area))
# option 2
birdlist %>%
count(plot, area, name = "bird")
gapminder %>%
summarise(mean(lifeExp))
gapminder %>%
summarise(range(lifeExp))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
gapminder %>%
filter(country == "India") %>%
group_by(country) %>%
summarise(GDPmax = max(gdpPercap), GDPmin = min(gdpPercap), GDPmean = mean(gdpPercap))
df <- data.frame(name = c("a", "a", "b", "c"), seedling = c(1, 0, 1, 0), adult = c(0,
5, 0, 1))
df_new <- df %>%
group_by(name) %>%
summarise(seedling = max(seedling, 0), adult = max(adult, 0)) %>%
ungroup()
library(dplyr)
library(hablar)
##
## Attaching package: 'hablar'
## The following object is masked from 'package:forcats':
##
## fct
## The following object is masked from 'package:dplyr':
##
## na_if
## The following object is masked from 'package:tibble':
##
## num
df <- tibble(a = c(1, 1, "a", 2, 2, 2, 4), b = c("a", "a", 1, "b", "b", "b", "c"))
df %>%
print()
## # A tibble: 7 × 2
## a b
## <chr> <chr>
## 1 1 a
## 2 1 a
## 3 a 1
## 4 2 b
## 5 2 b
## 6 2 b
## 7 4 c
df %>%
find_duplicates()
df %>%
distinct() %>%
print()
## # A tibble: 4 × 2
## a b
## <chr> <chr>
## 1 1 a
## 2 a 1
## 3 2 b
## 4 4 c
iris %>%
count(Species, name = "how many")
mtcars %>%
count(am, name = "number") %>%
as_tibble()
mtcars %>%
count(gear, name = "no. gear")
library(dplyr)
# Create a data frame with two columns named 'a' and 'b'
df <- data.frame(a = c("red", "blue", "green"), b = c(1, 2, 3))
# Create a new column named 'c' by combining values from 'a' and 'b'
df <- df %>%
mutate(c = paste(a, b, sep = "_"))
plot <- c(rep(1, 2), rep(2, 4), rep(3, 3))
bird <- as.factor(c("a", "b", "a", "b", "c", "d", "a", "b", "c"))
area <- c(rep(10, 2), rep(5, 4), rep(15, 3))
birdlist <- data.frame(plot, bird, area)
birdlist
# birdlist %>% group_by(plot, area) %>% mutate(count(bird))
birdlist %>%
group_by(plot, area) %>%
summarise(bird = n(), .groups = "drop")
# (dplyr::summarise)like this
# to summarize of a column with reference to two other variables.
treatment <- c(rep("ab", 2), rep("bgrnf", 8), rep("bgpnf", 4))
site <- c(
"ab1",
"ab2",
rep("bgrnf1", 3),
rep("bgrnf2", 2),
"bgrnf3",
"bgrnf4",
"bgrnf5",
rep("bgpnf1", 2),
rep("bgpnf2", 2)
)
data <- data.frame(treatment, site)
# to find the site per each treatment
data %>% group_by(treatment) %>% count(treatment, name = "#sites")
year <- c(rep(2000, 4),
rep(2001, 4),
rep(2002, 4)
)
site <- c(rep("a", 3),
rep("b", 3),
rep("c", 3),
rep("d", 3)
)
fire <- c("yes", "no", "yes",
"yes", "no", "no",
"yes", "yes", "yes",
"yes", "yes", "yes")
df <- data.frame(year, site, fire)
df %>%
group_by(site) %>%
summarize(
Burnt_once = sum(fire == "yes" &
year %in% c(2000, 2001, 2002)) == 1,
Burnt_twice = sum(fire == "yes" &
year %in% c(2000, 2001, 2002)) == 2,
Burnt_thrice = sum(fire == "yes" &
year %in% c(2000, 2001, 2002)) == 3
) %>% mutate(
Burnt_once = ifelse(Burnt_once, 1, 0),
Burnt_twice = ifelse(Burnt_twice, 1, 0),
Burnt_thrice = ifelse(Burnt_thrice, 1, 0)
) %>% summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE)))
# df %>%
# group_by(site) %>%
# summarize(
# Burnt_once = sum(fire == "yes" &
# year %in% c(2000, 2001, 2002)) == 1, # in these years look for 1 'yes'
# Burnt_twice = sum(fire == "yes" &
# year %in% c(2000, 2001, 2002)) == 2, # in these years look for 2 'yes'
# Burnt_thrice = sum(fire == "yes" &
# year %in% c(2000, 2001, 2002)) == 3 # in these years look for 3 'yes'
# ) %>% # returns a logical vector
# mutate(
# Burnt_once = ifelse(Burnt_once, 1, 0),
# Burnt_twice = ifelse(Burnt_twice, 1, 0),
# Burnt_thrice = ifelse(Burnt_thrice, 1, 0)
# ) %>% # convert logical response to numeric
# summarise( # summarise data
# across( # specifycolumns
# where(is.numeric), # select columns with numeric ones
# ~ sum( # selected column using the ~ formula notation
# .x, # for each selected columns
# na.rm = TRUE))) # remove any missing values before calculating the sum
library(dplyr)
library(stringr)
feedback <-
c("good_book", "good_read", "for knowledge", "adventure")
book <- c("Ramayana", "Bible", "Encyclopedia", "Mbharatha")
df <- data.frame(book, feedback)
df %>%
mutate(response = case_when(str_starts(feedback, "good") ~ "good")) %>%
select(book, response) %>% as_tibble()
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
iris %>%
mutate(species.code = case_when(Species == "setosa" ~ 1, Species == "versicolor" ~
2, Species == "virginica" ~ 3)) %>%
head()
library(dplyr)
iris %>%
select(Species) %>%
slice_sample(n = 10) %>%
mutate(code = if_else(Species == "setosa", 1, 0) # you might see different result!
)
df <- data.frame(films = c("Spider_man", "James_bond", "Iron_man", "Bat_man"))
df
df1 <- df %>%
separate(films, c("a", "b"), sep = "([_])")
df1
df1 %>%
unite("names", a:b, remove = FALSE)
df1 <- data.frame(id = c(1:4), films = c("Spider_man", "James_bond", "Iron_man",
"Bat_man"))
df2 <- data.frame(id = c(1:4), country = rep("us", 4))
df3 <- left_join(df1, df2, by = "id")
We are making a wide format from long format in the first example. The second example is to make a long format from wide.
# the following is already in long format
classdata <- data.frame(
studentname = c("captian", "ant", "james", "spider", "tony", "bat", "wonder"),
subject = c("math", "his", "math", "geo", "his", "geo", "math"),
grade = c("A+", "B", "B", "A+", "C", "B+", "C")
)
classdata %>% head()
wide.class <- spread(classdata, subject, grade)
# classdata= name of the data frame
# subject= new columns to be made
# grade= values to go into new columns
head(wide.class)
gather(wide.class, subject, grade, geo, his, math) %>%
drop_na()
# wide.class= name of the data frame
# subject= name of the column to put data into
# grade= name of the column to put value into
# geo, his, math= from where values has to be gathered
bind rows
df1 <-
data.frame(
id = c(1:4),
films = c("Spider_man", "James_bond", "Iron_man", "Bat_man")
)
df2 <-
data.frame(
id = c(5:8),
films = c("King Cong", "Silence of the lambs", "Intersteller", "Gravity")
)
dplyr::bind_rows(df1, df2)
For multiple variables
library(tidyverse)
srno <- c(1:2)
film <- c("arabica", "robust")
rate <- c("good", "better")
lang_Eng <- c("yes", "yes")
films <- data.frame(srno, film, rate, lang_Eng)
str(films)
## 'data.frame': 2 obs. of 4 variables:
## $ srno : int 1 2
## $ film : chr "arabica" "robust"
## $ rate : chr "good" "better"
## $ lang_Eng: chr "yes" "yes"
films <- films %>%
mutate(across(c(rate, lang_Eng), as.factor))
str(films)
## 'data.frame': 2 obs. of 4 variables:
## $ srno : int 1 2
## $ film : chr "arabica" "robust"
## $ rate : Factor w/ 2 levels "better","good": 2 1
## $ lang_Eng: Factor w/ 1 level "yes": 1 1
Select a key variable and everything or every other columns.
library(gapminder)
gapminder %>%
select(pop, everything()) %>%
head(3)
library(stringr)
data <- data.frame(Dose.Cm = c("d1", "D2", "D3"), Len.km = c("High", "low", "Low"))
glimpse(data)
## Rows: 3
## Columns: 2
## $ Dose.Cm <chr> "d1", "D2", "D3"
## $ Len.km <chr> "High", "low", "Low"
data %>%
mutate(Dose.Cm = tolower(Dose.Cm), Len.km = toupper(Len.km))
data <- data.frame(Dose.Cm = c("d1", "D2", "D3"), Len.km = c("high", "low", "medium"))
data <- data %>%
mutate(len = as.factor(Len.km))
glimpse(data)
## Rows: 3
## Columns: 3
## $ Dose.Cm <chr> "d1", "D2", "D3"
## $ Len.km <chr> "high", "low", "medium"
## $ len <fct> high, low, medium
data %>%
mutate(len = fct_relevel(len, c("low", "medium", "high")))
This drops any non-numeric characters before or after the first number. The grouping mark specified by the locale is ignored inside the number.
library(tidyverse)
class <- c("8th", "9th", "10th")
students <- c("25-30", "35-41", "21-28")
school <- data.frame(class, students)
school
glimpse(school) # notice students is a binned variable it is a not a numeric.
## Rows: 3
## Columns: 2
## $ class <chr> "8th", "9th", "10th"
## $ students <chr> "25-30", "35-41", "21-28"
school %>%
mutate(students = parse_number(students)) %>%
glimpse()
## Rows: 3
## Columns: 2
## $ class <chr> "8th", "9th", "10th"
## $ students <dbl> 25, 35, 21
school %>%
mutate(students = parse_number(students))
# now students because number with first value of the column
library(tidyverse)
rawdata <- data.frame(species_1 = rnorm(n = 40, mean = 300, sd = 18.5), species_2 = rnorm(40,
305, 16.7))
data <- pivot_longer(data = rawdata, cols = species_1:species_2, names_to = "species",
values_to = "weight")
library(tidyverse)
df <- data.frame(name = c("saneesh", "sanusha", "appu", "jaru"), fav.no = c(11, 7,
20, 21), animal = c("human", "human", "human", "dog"))
df %>%
pivot_wider(names_from = "animal", values_from = "fav.no")
# but when we have similar names in the grouping column
df1 <- data.frame(name = c("saneesh", "sanusha", "appu", "jaru", "saneesh"), fav.no = c(11,
7, 20, 21, 12), animal = c("human", "human", "human", "dog", "human"))
df1 %>%
pivot_wider(names_from = "animal", values_from = "fav.no")
## Warning: Values from `fav.no` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(name, animal)) |>
## dplyr::filter(n > 1L)
# because saneesh is repeated twice but with two fav.nos the solution is to add
# a row id, make pivot wide and get rid of the row id
df1 %>%
mutate(id = row_number()) %>%
group_by(name) %>%
pivot_wider(names_from = "animal", values_from = "fav.no", values_fill = 0) %>%
select(-id)
library(tidyverse)
numbers <- data.frame(test = seq(1:10))
numbers <-
numbers %>% mutate(test1 = as.numeric(cut_number(test, 3)))
numbers <- numbers %>% mutate(test1 = as.factor(test1)) %>%
mutate(test2 = recode(
test1,
"1" = "low",
"2" = "medium",
"3" = "high"
))
library(ggplot2)
ggplot(iris, aes(x = Petal.Length, y = Petal.Width, fill = Species), alpha = 0.07) +
geom_point(size = 4, shape = 21, color = "black", stroke = 1.5)
df <- data.frame(dose = c("D0.5", "D1", "D2"), len = c(4.2, 10, 29.5))
library(ggplot2)
# Basic barplot
p <- ggplot(data = df, aes(x = dose, y = len)) + geom_bar(stat = "identity")
p
# Horizontal bar plot p + coord_flip()
# Change the width of bars
ggplot(data = df, aes(x = dose, y = len)) + geom_bar(stat = "identity", width = 0.5)
# Change colors
ggplot(data = df, aes(x = dose, y = len)) + geom_bar(stat = "identity", color = "blue",
fill = "white")
# Minimal theme + blue fill color
p <- ggplot(data = df, aes(x = dose, y = len)) + geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal()
p
# out side the bars
p + geom_text(aes(label = len), vjust = -0.3, size = 3.5) + theme_minimal()
p + geom_text(aes(label = len), vjust = 1.6, color = "white", size = 3.5) + theme_minimal()
df <- data.frame(dose = c("D0.5", "D1", "D2", "pp", "kk", "rr"), len = c(4.2, 10,
29.5, 12, 15, 23))
library(ggplot2)
ggplot(df, aes(len)) + geom_density() + geom_vline(aes(xintercept = mean(len)), col = "red",
linetype = "dashed")
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width)) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
library(ggdist)
library(tidyverse)
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
mpg %>% filter(cyl %in% c(4, 6, 8)) %>%
ggplot(aes(
x = factor(cyl),
y = hwy,
fill = factor(cyl)
)) +
# add half violin from `ggdist` package
ggdist::stat_halfeye(
# custom bandwidth
adjust = 0.5,
# move geom to right
justification = -0.2,
# remove slab interval
.width = 0,
point_color = NA
) +
# add boxplot
geom_boxplot(width = 0.12,
# remove outliers
outlier.colour = NA,
alpha = 0.5) +
# add dot plots from `ggdist` package
ggdist::stat_dots( # orientation of the plot
side = "left",
# move geom to the left
justification = 1.1,
# adjust grouping of observation
binwidth = 0.25) +
# adjust theme
scale_fill_tq() +
theme_tq() +
labs(
title = "raincloud plot",
subtitle = "showing bimodel distribution of 6 cylinder vehicles",
x = "highway fuel efficiency",
y = "cylinders"
) +
coord_flip()
library(tidyverse)
# install.packages("hexbin")
class <- c(rep("10th", 8))
students <- c("10 to 15",
"15-20",
"17 to 24",
"20 to 25",
"25 to 30",
"30 to 40",
"45 to 47",
"50 to 55")
latitude <- c(
11.50897246,
11.48323136,
11.48719031,
11.46366611,
11.41097322,
11.52111154,
11.44491386,
11.46569568
)
longitude <- c(
76.06032062,
76.06192685,
76.04266851,
76.04156575,
76.05075092,
76.02846331,
76.03084141,
76.01766216
)
school <- data.frame(class, students, latitude, longitude)
school %>% mutate(students = parse_number(students)) %>%
ggplot(aes(latitude, longitude, z = students)) +
stat_summary_hex() +
scale_fill_viridis_c(alpha = 0.8) +
labs(fill = "students", title = "school students")
## Warning: Computation failed in `stat_summary_hex()`
## Caused by error in `compute_group()`:
## ! The package "hexbin" is required for `stat_summary_hex()`
ggplot(iris, aes(x = Species, y = Sepal.Length)) + geom_boxplot() + labs(x = expression(text[subscript]),
y = expression(text^superscript))
library(ggplot2)
library(dplyr, warn = FALSE)
iris %>%
filter(Species != "setosa") %>%
ggplot(aes(x = Petal.Length, y = Petal.Width)) + geom_point() + facet_wrap(~Species) +
theme(strip.background.x = element_blank(), strip.text.x = element_text(hjust = 0,
size = 11))
income.data <- data.frame(Village = c(rep("Chittor", 20), rep("Bellari", 20)), Income = c(rnorm(n = 20,
mean = 1000, sd = 150), rnorm(n = 20, mean = 1000, sd = 150)))
library(ggplot2)
ggplot(income.data, aes(Village, Income)) + geom_boxplot() + stat_summary(geom = "point",
fun = mean, col = "red")
income.data <- data.frame(Village = c(rep("Chittor", 20), rep("Bellari", 20)), Income = c(rnorm(n = 20,
mean = 1000, sd = 150), rnorm(n = 20, mean = 1000, sd = 150)))
library(ggplot2)
ggplot(income.data) + geom_vline(aes(xintercept = mean(Income)), linetype = "dashed") +
geom_density(aes(x = Income, color = Village)) + geom_vline(xintercept = 959,
linetype = "dotted", col = "#f39c96") + geom_vline(xintercept = 1051, linetype = "dotted",
col = "#00bfc4")
library(tidyverse)
# Using median
mpg %>%
mutate(class = fct_reorder(class, hwy, .fun = "median")) %>%
ggplot(aes(x = reorder(class, hwy), y = hwy, fill = class)) + geom_boxplot() +
xlab("class") + theme(legend.position = "none") + xlab("")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data <- data.frame(category = c("Poaceae", "Fabaceae", "Asteraceae", "Acanthaceae",
"Rubiaceae", "Euphorbiaceae", "Others"), count = c(18, 15, 8, 4, 4, 3, 17))
fig <- data %>%
plot_ly(labels = ~category, values = ~count)
fig <- fig %>%
add_pie(hole = 0.4) %>%
layout(title = "Donut charts using Plotly", showlegend = T)
fig
# create dummy data
data <- data.frame(name = letters[1:5], value = sample(seq(4, 15), 5), sd = c(1,
0.2, 3, 2, 4))
# Most basic error bar
library(viridis)
## Loading required package: viridisLite
ggplot(data) + geom_bar(aes(x = name, y = value), stat = "identity", fill = "skyblue",
alpha = 0.7) + scale_fill_viridis_d() + geom_errorbar(aes(x = name, ymin = value -
sd, ymax = value + sd), width = 0.4, colour = "orange", alpha = 0.9, linewidth = 1.3)
library(tidyverse)
df <- tribble(~gender,
~height,
"male",
12,
"male",
8,
"female",
11.5,
"female",
11)
ggplot(df, aes(gender, height)) +
geom_point() +
annotate(
geom = "text",
x = 1.29,
y = 11.4,
label = "short person",
color = "red",
size = 3,
fontface = "italic"
) +
annotate(
geom = "segment",
x = 1.05,
# starting point on x, this decides length
xend = 1.3,
# end point on x, this decides length
y = 11.02,
# starting point on y
yend = 11.3,
# ending point on y
color = "blue",
linetype = "dashed"
) +
annotate(
geom = "segment",
x = 1.95,
# starting point on x, this decides length
xend = 1.3,
# end point on x, this decides length
y = 8.2,
# starting point on y
yend = 11.3,
# ending point on y
color = "blue",
linetype = "dashed"
)
library(lubridate)
months <- seq(month(1:12)) # make moths
months <- month.abb[months] # make abbriviations
temperature <- c(10, 12, 22, 32, 35, 30, 33, 28, 29, 25, 19, 14)
myframe <- data.frame(months, temperature) # creating a new data frame
library(tidyverse)
glimpse(myframe)
## Rows: 12
## Columns: 2
## $ months <chr> "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "S…
## $ temperature <dbl> 10, 12, 22, 32, 35, 30, 33, 28, 29, 25, 19, 14
library(ggplot2)
ggplot(myframe, aes(x = months, y = temperature, group = 1)) + geom_line(col = "blue") +
geom_point(col = "red") + ggtitle("Temperature of months") + scale_x_discrete(limits = month.abb) # this will order months on the x axis
# create and view data frame
df <- data.frame(date = c("05/30/2021", "08/18/2021", "09/13/2021", "02/19/2021"),
sales = c(3, 15, 14, 9))
df <- df %>%
mutate(date = as.Date(date, format = "%m/%d/%Y")) %>%
arrange(date)
df
p + scale_x_discrete(limits = c("D0.5", "D2"))
## Warning: Removed 1 rows containing missing values (`position_stack()`).
df2 <- data.frame(supp = rep(c("VC", "OJ"), each = 3), dose = rep(c("D0.5", "D1",
"D2"), 2), len = c(6.8, 15, 33, 4.2, 10, 29.5))
p <- ggplot(data = df2, aes(x = dose, y = len, fill = supp)) + geom_bar(stat = "identity",
position = position_dodge()) + geom_text(aes(label = len), vjust = 1.6, color = "white",
position = position_dodge(0.9), size = 3.5) + scale_fill_brewer(palette = "Paired") +
theme_minimal()
# Stacked barplot with multiple groups
ggplot(data = df2, aes(x = dose, y = len, fill = supp)) + geom_bar(stat = "identity")
# Use position=position_dodge()
ggplot(data = df2, aes(x = dose, y = len, fill = supp)) + geom_bar(stat = "identity",
position = position_dodge())
# Change the colors manually
p <- ggplot(data = df2, aes(x = dose, y = len, fill = supp)) + geom_bar(stat = "identity",
color = "black", position = position_dodge()) + theme_minimal()
# Use custom colors
p + scale_fill_manual(values = c("#999999", "#E69F00"))
# Use brewer color palettes
p + scale_fill_brewer(palette = "Blues")
libraries
# install.packages('MetBrewer')
library(MetBrewer)
Plot the point plot using GDP per Capita as the x- axis and LE as the y axis. Numerical variable Population to control the size of each point.
plot <- gapminder %>%
filter(year == 2007) %>%
ggplot() + labs(x = "GDP per Capita", y = "Life Expectancy", color = "Population in millions",
size = "Population in millions") + theme_minimal()
plot + geom_point(aes(gdpPercap, lifeExp, size = pop/1e+06))
To use color in the plot, assign the Population variable to the color aesthetic. Since nothing is specied, ggplot2 chooses a color spectrum for this numerical variable (shades of blue).
plot + geom_point(aes(gdpPercap, lifeExp, size = pop/1e+06, color = pop/1e+06))
To control the color spectrum, we need to introduce a color scale. In the following plot, we have to provide a vector of hex color values. You would choose this if you got your colors from one of the mentioned above websites.
plot + geom_point(aes(gdpPercap, lifeExp, size = pop/1e+06, color = pop/1e+06)) +
scale_color_gradientn(colors = c("#003049", "#D62828", "#F77F00", "#FCBF49",
"#EAE2B7"))
To apply one of the MetBrewer palettes, replace the hex-vector with a MetBrewer function. Within the function call, you provide the palette’s name, then several colors, and tell it that we need a continuous palette since it is a numerical variable.
plot + geom_point(aes(gdpPercap, lifeExp, size = pop/1e+06, color = pop/1e+06)) +
scale_color_gradientn(colors = met.brewer("Cross", n = 500, type = "continuous"))
You might also want to use color palettes with non-numerical variables. Let us assume we want to apply color to the Continent variable. This implies using a manual color scale and providing a MetBrewer palette.
plot + geom_point(aes(gdpPercap, lifeExp, size = pop/1e+06, color = continent)) +
scale_color_manual(values = met.brewer("Navajo", 5))
Please note if you want to apply color to the fill aesthetic rather than the color aesthetic, consider using the scale_fill_manuel function instead of the scale_color_manuel. This is useful for boxplots or bar charts.
gapminder %>%
filter(gdpPercap < 60000) %>%
ggplot(aes(continent, gdpPercap, color = year, fill = continent)) + geom_boxplot() +
theme_minimal() + labs(x = "Continent", y = "GDP per Capita", fill = "Continent")
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
df <- data.frame(Names = as.factor(c("Bacteria", "Yeast", "None")), Quantity = c(2.5,
5.5, 7.5))
library(ggplot2)
library(tidyverse)
df <- df %>%
mutate(Names = fct_relevel(Names, c("Bacteria", "Yeast", "None")))
ggplot(df, aes(Names, Quantity, fill = Names)) + geom_bar(stat = "identity") + scale_fill_manual(values = c("#110a62",
"#fcd749", "#b5b4b5")) + labs(y = "Necter pH", x = "Microbe added to nectar") +
theme_classic() + theme(legend.position = "none", axis.ticks.x = element_blank()) +
theme(axis.text = element_text(size = 22, color = "black")) + theme(axis.line.x = element_blank()) +
theme(axis.ticks = element_line(size = 1, color = "black"), axis.ticks.length = unit(0.5,
"cm")) + theme(text = element_text(size = 22))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# ggThemeAssist::ggThemeAssistGadget(name of the plot)
x11() # opne a new window for graphics
graphics.off() # close the new window
Normal distribution, also known as the Gaussian distribution, is a probability distribution that is symmetric about the mean, showing that data near the mean are more frequent in occurrence than data far from the mean.
library(tidyverse)
n = 1000
mean = 170 # cm
sd = 6.35 # cm
binwidth = 0.3
set.seed(1234)
df <- data.frame(x = rnorm(n, mean, sd))
ggplot(df, aes(x = x, mean = mean, sd = sd, binwidth = binwidth, n = n)) + theme_bw() +
geom_histogram(binwidth = binwidth, colour = "white", fill = "lightblue", size = 0.1) +
stat_function(fun = function(x) dnorm(x, mean = mean, sd = sd) * n * binwidth,
color = "darkred", linewidth = 1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(googlesheets4)
library(dplyr)
library(wordcloud)
## Loading required package: RColorBrewer
##
## Attaching package: 'wordcloud'
## The following object is masked from 'package:PerformanceAnalytics':
##
## textplot
library(RColorBrewer)
# gs4_auth()
path <- ("https://docs.google.com/spreadsheets/d/1ac8CuAQdRNXp9MjKsG7YWiHcT64tRgnCqlY9UhX-jEo/edit?usp=sharing")
test <- read_sheet(path)
## ! Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to
## the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googlesheets4 package is using a cached token for 'cssaneesh@gmail.com'.
## ✔ Reading from "wordcloud".
## ✔ Range 'Sheet1'.
head(test, 3)
test1 <- data.frame(test %>%
select(topic) %>%
count(topic) %>%
mutate(count = n * 10))
head(test1, 3)
max(test1$count)
## [1] 90
set.seed(123)
wordcloud(words = test1$topic, freq = test1$count, min.freq = 10, max.words = 50,
colors = brewer.pal(7, "BrBG"))
# export the file as .pdf
df <- data.frame(name = c("saneesh", "kishan", "anil", "mahi", "sanusha"), sex = c("male",
"female", "male", "male", "female"), weight = c(60, 58, 65, 70, 48), favno = c(2,
6, 10, 1, 15))
ggplot(df, aes(x = sex, y = weight, col = name, size = favno, shape = sex)) + geom_point()
# remove all legends
ggplot(df, aes(x = sex, y = weight, col = name, size = favno)) + geom_point() + theme(legend.position = "none")
# remove legend created by color
ggplot(df, aes(x = sex, y = weight, col = name, size = favno)) + geom_point() + guides(color = "none")
# remove legend created by shape
ggplot(df, aes(x = sex, y = weight, col = name, size = favno)) + geom_point() + guides(shape = "none")
# remove legend created by size
ggplot(df, aes(x = sex, y = weight, col = name, size = favno)) + geom_point() + guides(size = "none")
# install.packages('ggflowchart')
library(ggflowchart)
data <- tibble::tibble(from = c("A", "A", "A", "B", "C", "F"), to = c("B", "C", "D",
"E", "F", "G"))
ggflowchart(data)
dice <- c(1:6)
myluck <- function(x) {
myluck <- sample(dice, size = 1, replace = T)
return(myluck)
}
myluck()
## [1] 6
names <- c("saneesh", "appu", "sanusha")
who <- function(x) {
who <- sample(names, 1, T)
return(who)
}
who()
## [1] "saneesh"
library(dagitty)
##
## Attaching package: 'dagitty'
## The following object is masked from 'package:hablar':
##
## convert
sapling <- dagitty("dag{
Treatment-> RCD <- Livestock;
Trench -> RCD
}")
coordinates(sapling) <- list(x = c(Treatment = 1, Livestock = 2, Trench = 2, RCD = 2 # column 2
), y = c(Treatment = 0, RCD = 0, Livestock = -1, Trench = 1))
# Treatment=1 column 1 Livestock= 2, column 2 Trench= 2, column 2 RCD=2 column
# 2
# Treatment=0, middle row/0 RCD=0, middle row/0 Livestock= -1, above middle row
# -1 Trench= 1 below the middle row/1
plot(sapling)
df <- data.frame(name = as.factor(c("James Bond", "Spider Man", "Iron Man")))
# df <- df %>% separate(name, c('Genus', 'Species'), sep = '([ ])')
shorten <- function(df) {
name_split <- df %>%
separate(name, c("Genus", "Species"), sep = "([ ])")
print(name_split)
}
shorten(df)
## Genus Species
## 1 James Bond
## 2 Spider Man
## 3 Iron Man
library(ggplot2)
library(dplyr)
data <- data.frame(sex = rep(c("male", "female"), each = 20), weight = c(rnorm(20,
mean = 65, 5), rnorm(20, mean = 55, 5)), height = c(rnorm(20, mean = 165, 6),
rnorm(20, mean = 152, sd = 6)))
# Plot the interaction using ggplot2
data %>%
ggplot(aes(x = height, y = weight, color = sex)) + geom_point() + geom_smooth(method = "lm",
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
# page <-
# read_html('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population')
# tables <- html_table(page) typeof(tables) unlist(tables) table2 <-
# as.data.frame(tables[[2]]) head(table2,2)
to apply to every chunk in the file
inside the chunk write
knitr::opts_chunk$set(include= ,echo = , message= , warning= )
# knitr::opts_chunk$set(message = TRUE, echo = TRUE, warning = TRUE)
include: to show or hide code and results from
appearing
echo: to show or hide code in the output but shows
result
message to hide or show the messages generated by the
code
warning: to show or hide warning generated by the code
these options can be written for individual chunks as well
## [1] 5
1 # heading 1
2 ## heading 2 3 ### heading 3
italics
italic
bold
bold
plot() to show r code/function
@Saneesh
this is a blockquote
— Saneesh
hello
# [mathematical
# notations](https://rpruim.github.io/s341/S19/from-class/MathinRmd.html)
\(by\) $by$
\(\mu\) $\mu$
\(\sum\) $\sum$
\(a\pm b\) $a\pm b$
\(x=y\) $x=y$
\(x>y\) $x>y$
\(x^2\) $x^2$
\(x\le y\) $x\le y$
\(\sum_{n=1}^{10} n^2\)
$\sum_{n=1}^{10} n^2$
\(LUI_i=\frac12(gi/gm)+\frac12(ti/tm)\)
$LUI_i=\frac12(gi/gm)+\frac12(ti/tm)$ \(x_{1}+ x_{2}+\cdots+x_{n}\)
$x_{1}+ x_{2}+\cdots+x_{n}$
\(|A|\) $|A|$
\(A\subset B\)
$A\subset B$
\(A \subseteq B\)
$A \subseteq B$
\(A \cup B\)
$A \cup B$
\(A \cap B\)
$A \cap B$
\(P(A|B)\) $P(A|B)$
\(\alpha\) $\alpha$
\(\beta\) $\beta$
\(\gamma\) $\gamma$
\(\theta\) $\theta$
\(H_2O\) $H_2O$
write
Inside a chunk after three … r,
echo=FALSE,out.width="70%",fig.align="center",fig.cap='write'
close the curly bracket, then write knitr::include_graphics(“Idly.jpg”)
# keep the image in the project folder, then close the chunk. with
‘```’
write an exclamation mark !, then square brackets
[caption] write caption in it, the normal brackets
(Idly.jpg) write the name of the file and it’s extension
i.e., idly.jpg